package org.biogroovy.io.eutils

import javax.xml.namespace.QName
import javax.xml.parsers.DocumentBuilderFactory
import javax.xml.xpath.*

import org.biogroovy.conf.BioGroovyConfig
import org.biogroovy.eutils.EUtilsURLFactory
import org.biogroovy.io.AbsXmlReader
import org.biogroovy.io.IFetcher;
import org.biogroovy.models.*
import org.w3c.dom.Node
import org.w3c.dom.NodeList

/**
 * This reader reads a GBSeqProtein object
 */
class GBSeqProteinReader extends AbsXmlReader<Protein>{
	
	/** The GBSeq database. */
	private static final String DATABASE_NAME = "gbseqprotein"

	/**
	 * A collection of XPath expressions used to extract information from the EUtils XML response.
	 * Each key maps to a field name in the Protein object, each value is an XPath expression.
	 */
	static final Map<String, String> XPATH_MAP = [
		accession:'GBSeq_primary-accession',
		sequence:'GBSeq_sequence',
		species:'GBSeq_organism',
		name:'GBSeq_definition'
	];

	/**
	 * A collection of XPath node types.  Each key maps to a field name in the Protein class,
	 * each value is an XPathConstants.
	 */
	static final Map<String, QName> NODE_TYPE_MAP = [
		accession:XPathConstants.STRING,
		sequence:XPathConstants.STRING,
		species:XPathConstants.STRING,
		name:XPathConstants.STRING
	]

	static final String ROOT_PATH = "//GBSet/GBSeq";
	
	String tool = null;
	String email = null;
	
	/**
	 * Constructor.
	 */
	public GBSeqProteinReader(){
		this.databaseName = DATABASE_NAME;
		ConfigObject conf = BioGroovyConfig.getConfig();
		this.tool = conf.eutils.tool
		this.email = conf.eutils.email
	}

	/**
	 * This method retrieves an EntrezGene record and parses the contents.
	 * @param id  The ID of the EntrezGene record to be retrieved and parsed.
	 * @throws IOException if there is a problem retrieving or parsing the file.
	 */
	public Protein read(String id) throws IOException{
		URL url = getUrl(id, [tool:this.tool, email: this.email]);
		return read(url.openStream());
	}

	/**
	 * This method reads a GBSeq protein file and returns a Protein object.
	 * @param file  The file to be read.
	 */
	public Protein read(File file) throws IOException{
		return read(new FileInputStream(file));
	}

	/**
	 * This method reads a GBSeq protein input stream and returns Protein object.
	 * @param  is The input stream to be read.
	 * @return protein  The protein object referred to in the input stream.
	 */
	public Protein read(InputStream is) throws IOException{
		List<Protein> protList = readList(is);
		return protList?.get(0);
	}
	
	/**
	 * This method reads a list of proteins from the input stream.
	 * @param inputStream  The input stream to be read
	 * @return a list of Protein objects
	 */
	public List<Protein> readList(InputStream inputStream) throws IOException{
		List<Protein> proteinList = new ArrayList<Protein>();
		
		def builder  = DocumentBuilderFactory.newInstance().newDocumentBuilder();
		def root     = builder.parse(inputStream).documentElement
		
		XPath xpath = XPathFactory.newInstance().newXPath();
		NodeList nodeList = xpath.evaluate(ROOT_PATH, root, XPathConstants.NODESET);
		for(int i=0; i < nodeList.getLength(); i++){
			Node node = nodeList.item(i)
			Protein protein = new Protein()
			parse(protein, node);
			proteinList.add(protein);
		}
		
		builder = null;
		root = null;
				
		return proteinList;

	}

	@Override
	public void parse(Protein protein, Node node) {
		parseData(node, protein, XPATH_MAP, NODE_TYPE_MAP);
	}

	@Override
	public Protein fetch(String id) throws IOException {
		URL url = getUrl(id, [tool:this.tool, email:this.email])
		return read(url.openStream());
	}

	@Override
	public URL getUrl(String id, Map<String, String> paramMap) {
		Map<String, String> map = [db:EUtilsURLFactory.DB_PROTEIN, id:id, rettype:'gp', retmode:'xml']
		map.putAll(paramMap)
		String url = EUtilsURLFactory.getURL(EUtilsURLFactory.EFETCH, map);
		return new URL(url);
	}



	@Override
	public List<Protein> fetchAll(String id) throws IOException {
		URL url = getUrl(id, [tool:this.tool, email:this.email])
		return readList(url.openStream())
	}

	@Override
	public IFetcher<Protein> getNewInstance() {
		return new GBSeqProteinReader();
	}



}

